# OTU table
otu_table = read_tsv("../data/otu-table.tsv", skip = 1)
otu_id = otu_table$`#OTU ID`
otu_table = data.frame(otu_table[, -1], check.names = FALSE, row.names = otu_id)
# Taxonomy table
tax = read_tsv("../data/taxonomy.tsv")
otu_id = tax$`Feature ID`
tax = data.frame(tax[, - c(1, 3)], row.names = otu_id)
tax = tax %>% separate(col = Taxon,
into = c("Kingdom", "Phylum", "Class", "Order",
"Family", "Genus", "Species"),
sep = ";")
for (i in 1:ncol(tax)) {
tax[, i] = sapply(tax[, i], function(x) str_split(x, "__")[[1]][2])
}
tax = as.matrix(tax)
tax[tax == ""] = NA
# Tree
tree = read_tree("../data/tree.nwk")
# Meta data
meta_data = read_tsv("../data/metadata.tsv")
meta_data = meta_data %>%
mutate_if(is.character, as.factor)
meta_data$sampleid = as.character(meta_data$sampleid)
meta_data$caregiver_stress_level = factor(meta_data$caregiver_stress_level,
levels = c("Low", "Medium", "High"))
meta_data$depression_level = factor(meta_data$depression_level,
levels = c("Low", "Medium", "High"))
meta_data$hostility_level = factor(meta_data$hostility_level,
levels = c("Low", "Medium", "High"))
meta_data$das_level = factor(meta_data$das_level,
levels = c("Low", "Medium", "High"))
meta_data$metabolic_syndrome_level = factor(meta_data$metabolic_syndrome_level,
levels = c("Low", "Medium", "High"))
OTU = otu_table(otu_table, taxa_are_rows = TRUE)
META = sample_data(meta_data)
sample_names(META) = meta_data$sampleid
TAX = tax_table(tax)
otu_data = phyloseq(OTU, TAX, META, tree)
# Aggregate taxa
genus_data = aggregate_taxa(otu_data, "Genus")
genus_data2 = merge_taxa2(genus_data, pattern = "\\Clostridium", name = "Clostridium")
genus_rarefied = rarefy_even_depth(genus_data2, rngseed = 1,
sample.size = 0.9 * min(sample_sums(genus_data2)),
replace = FALSE)
P-value is obtained by Kruskal-Wallis Rank Sum Test.
d_alpha = alpha(genus_rarefied, index = "diversity_shannon")
df_alpha = data.frame(d = d_alpha$diversity_shannon,
meta(genus_rarefied) %>%
dplyr::select(alcohol,
caregiver_stress_level:metabolic_syndrome_level))
covariates = colnames(df_alpha)[-1]
labels = c("Alcohol Use", "Caregiver Stress Level", "Depression Level",
"Hostility Level", "DAS Level", "Metabolic Syndrome Level")
for (i in seq_along(covariates)) {
cat("\n \n \n")
covariate = covariates[i]
label = labels[i]
tformula = formula(paste0("d ~ ", covariate))
p_val = kruskal.test(tformula, data = df_alpha)$p.value
df_ann = data.frame(x = 2.2, y = 1.01 * max(df_alpha$d), p = p_val)
df_ann = df_ann %>%
mutate(label = paste0("p = ", signif(p, 2)))
df_fig = df_alpha %>%
transmute(x = !!as.name(covariate), y = d)
df_fig = df_fig[complete.cases(df_fig), ]
p = df_fig %>%
ggplot(aes(x = x, y = y)) +
geom_violin(trim = FALSE, aes(fill = x)) +
geom_boxplot(width = 0.1, fill = "white") +
scale_fill_discrete(name = NULL) +
geom_label(data = df_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange") +
labs(x = NULL, y = "Shannon’s diversity index", title = label) +
theme_bw() +
theme(strip.background = element_rect(fill = "white"),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
print(p)
cat("\n \n \n")
fig_path1 = paste0("../images/diversity/genus_alpha_", covariate, ".jpeg")
fig_path2 = paste0("../images/diversity/genus_alpha_", covariate, ".pdf")
ggsave(filename = fig_path1, plot = p, height = 5, width = 6.25,
units = 'in', dpi = 300)
ggsave(filename = fig_path2, plot = p, height = 5, width = 6.25)
}
P-values are obtained by Permutational Multivariate Analysis of Variance (PERMANOVA) and Permutational Analysis of Multivariate Dispersion (PERMDISP).
for (i in seq_along(covariates)) {
cat("\n \n \n")
covariate = covariates[i]
label = labels[i]
pseq = genus_rarefied
sample_data(pseq)$covariate = meta(pseq)[, covariate]
pseq_subset = subset_samples(pseq, !is.na(covariate))
set.seed(123)
# PERMANOVA
permanova = adonis(t(abundances(pseq_subset)) ~ covariate,
data = meta(pseq_subset),
permutations = 999, method = "bray")$aov.tab
# PERMDISP
dis = vegdist(t(abundances(pseq_subset)), method = "bray")
groups = sample_data(pseq_subset)$covariate
mod = betadisper(d = dis, group = groups, type = "median")
# Draw the Plot
labs = paste0("PCoA", 1:2, " (", signif(100 * mod$eig / sum(mod$eig), 3), "%)")
# brewer.pal(n = 8, name = "Accent")
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
cat("\n \n \n")
# Export
fig_path1 = paste0("../images/diversity/genus_beta_", covariate, ".jpeg")
jpeg(filename = fig_path1, height = 5, width = 6.25, res = 300, units = "in")
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
dev.off()
fig_path2 = paste0("../images/diversity/genus_beta_", covariate, ".pdf")
pdf(file = fig_path2, height = 5, width = 6.25)
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
dev.off()
}
# Aggregate taxa
species_data = aggregate_taxa(otu_data, "Species")
species_rarefied = rarefy_even_depth(species_data, rngseed = 1,
sample.size = 0.9 * min(sample_sums(species_data)),
replace = FALSE)
P-value is obtained by Kruskal-Wallis Rank Sum Test.
d_alpha = alpha(species_rarefied, index = "diversity_shannon")
df_alpha = data.frame(d = d_alpha$diversity_shannon,
meta(species_rarefied) %>%
dplyr::select(alcohol,
caregiver_stress_level:metabolic_syndrome_level))
covariates = colnames(df_alpha)[-1]
labels = c("Alcohol Use", "Caregiver Stress Level", "Depression Level",
"Hostility Level", "DAS Level", "Metabolic Syndrome Level")
for (i in seq_along(covariates)) {
cat("\n \n \n")
covariate = covariates[i]
label = labels[i]
tformula = formula(paste0("d ~ ", covariate))
p_val = kruskal.test(tformula, data = df_alpha)$p.value
df_ann = data.frame(x = 2.2, y = 1.01 * max(df_alpha$d), p = p_val)
df_ann = df_ann %>%
mutate(label = paste0("p = ", signif(p, 2)))
df_fig = df_alpha %>%
transmute(x = !!as.name(covariate), y = d)
df_fig = df_fig[complete.cases(df_fig), ]
p = df_fig %>%
ggplot(aes(x = x, y = y)) +
geom_violin(trim = FALSE, aes(fill = x)) +
geom_boxplot(width = 0.1, fill = "white") +
scale_fill_discrete(name = NULL) +
geom_label(data = df_ann, aes(x = x, y = y, label = label),
size = 4, vjust = -0.5, hjust = 0, color = "orange") +
labs(x = NULL, y = "Shannon’s diversity index", title = label) +
theme_bw() +
theme(strip.background = element_rect(fill = "white"),
legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
print(p)
cat("\n \n \n")
fig_path1 = paste0("../images/diversity/species_alpha_", covariate, ".jpeg")
fig_path2 = paste0("../images/diversity/species_alpha_", covariate, ".pdf")
ggsave(filename = fig_path1, plot = p, height = 5, width = 6.25,
units = 'in', dpi = 300)
ggsave(filename = fig_path2, plot = p, height = 5, width = 6.25)
}
P-values are obtained by Permutational Multivariate Analysis of Variance (PERMANOVA) and Permutational Analysis of Multivariate Dispersion (PERMDISP).
for (i in seq_along(covariates)) {
cat("\n \n \n")
covariate = covariates[i]
label = labels[i]
pseq = species_rarefied
sample_data(pseq)$covariate = meta(pseq)[, covariate]
pseq_subset = subset_samples(pseq, !is.na(covariate))
set.seed(123)
# PERMANOVA
permanova = adonis(t(abundances(pseq_subset)) ~ covariate,
data = meta(pseq_subset),
permutations = 999, method = "bray")$aov.tab
# PERMDISP
dis = vegdist(t(abundances(pseq_subset)), method = "bray")
groups = sample_data(pseq_subset)$covariate
mod = betadisper(d = dis, group = groups, type = "median")
# Draw the Plot
labs = paste0("PCoA", 1:2, " (", signif(100 * mod$eig / sum(mod$eig), 3), "%)")
# brewer.pal(n = 8, name = "Accent")
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
cat("\n \n \n")
# Export
fig_path1 = paste0("../images/diversity/species_beta_", covariate, ".jpeg")
jpeg(filename = fig_path1, height = 5, width = 6.25, res = 300, units = "in")
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
dev.off()
fig_path2 = paste0("../images/diversity/species_beta_", covariate, ".pdf")
pdf(file = fig_path2, height = 5, width = 6.25)
plot(mod, pch = 15:16, cex.lab = 1.25, cex = 1,
main = label,
xlab = labs[1], ylab = labs[2], ylim = c(-0.4, 0.6), xaxt = "n",
col = c("#7FC97F", "#BEAED4"), sub = NULL,
hull = FALSE, ellipse = TRUE, conf = 0.68) # 68% data coverage for data ellipses
axis(1, at = round(seq(-0.6, 0.6, by = 0.2), 1), las = 1)
legend(x = 0.5, y = 0.2, legend = unique(groups),
col = c("#7FC97F", "#BEAED4"), pch = 15:16, cex = 0.8)
legend(x = 0.2, y = 0.6, cex = 0.7,
legend = c(paste0("p (PERMANOVA) = ", signif(permanova$`Pr(>F)`[1], 2)),
paste0("p (PERMDISP) = ", signif(permutest(mod)$tab$`Pr(>F)`[1], 2))))
dev.off()
}
sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] kableExtra_1.3.4 knitr_1.33 qwraps2_0.5.2 magrittr_2.0.1
[5] compositions_2.0-2 vegan_2.5-7 lattice_0.20-44 permute_0.9-5
[9] microbiome_1.14.0 phyloseq_1.36.0 forcats_0.5.1 stringr_1.4.0
[13] dplyr_1.0.7 purrr_0.3.4 tidyr_1.1.3 tibble_3.1.3
[17] ggplot2_3.3.5 tidyverse_1.3.1 openxlsx_4.2.4 readr_2.0.1
loaded via a namespace (and not attached):
[1] Rtsne_0.15 colorspace_2.0-2 ellipsis_0.3.2
[4] XVector_0.32.0 fs_1.5.0 rstudioapi_0.13
[7] farver_2.1.0 bit64_4.0.5 fansi_0.5.0
[10] lubridate_1.7.10 xml2_1.3.2 codetools_0.2-18
[13] splines_4.1.1 robustbase_0.93-8 ade4_1.7-17
[16] jsonlite_1.7.2 broom_0.7.9 cluster_2.1.2
[19] dbplyr_2.1.1 compiler_4.1.1 httr_1.4.2
[22] backports_1.2.1 assertthat_0.2.1 Matrix_1.3-4
[25] cli_3.0.1 htmltools_0.5.1.1 tools_4.1.1
[28] igraph_1.2.6 gtable_0.3.0 glue_1.4.2
[31] GenomeInfoDbData_1.2.6 reshape2_1.4.4 Rcpp_1.0.7
[34] Biobase_2.52.0 cellranger_1.1.0 vctrs_0.3.8
[37] Biostrings_2.60.2 rhdf5filters_1.4.0 multtest_2.48.0
[40] ape_5.5 svglite_2.0.0 nlme_3.1-152
[43] iterators_1.0.13 tensorA_0.36.2 xfun_0.25
[46] rvest_1.0.1 lifecycle_1.0.0 DEoptimR_1.0-9
[49] zlibbioc_1.38.0 MASS_7.3-54 scales_1.1.1
[52] vroom_1.5.4 hms_1.1.0 parallel_4.1.1
[55] biomformat_1.20.0 rhdf5_2.36.0 yaml_2.2.1
[58] stringi_1.7.3 highr_0.9 S4Vectors_0.30.0
[61] foreach_1.5.1 BiocGenerics_0.38.0 zip_2.2.0
[64] GenomeInfoDb_1.28.1 rlang_0.4.11 pkgconfig_2.0.3
[67] systemfonts_1.0.2 bitops_1.0-7 evaluate_0.14
[70] Rhdf5lib_1.14.2 labeling_0.4.2 bit_4.0.4
[73] tidyselect_1.1.1 plyr_1.8.6 R6_2.5.0
[76] IRanges_2.26.0 generics_0.1.0 DBI_1.1.1
[79] pillar_1.6.2 haven_2.4.3 withr_2.4.2
[82] mgcv_1.8-36 survival_3.2-12 RCurl_1.98-1.3
[85] bayesm_3.1-4 modelr_0.1.8 crayon_1.4.1
[88] utf8_1.2.2 tzdb_0.1.2 rmarkdown_2.10
[91] grid_4.1.1 readxl_1.3.1 data.table_1.14.0
[94] webshot_0.5.2 reprex_2.0.1 digest_0.6.27
[97] stats4_4.1.1 munsell_0.5.0 viridisLite_0.4.0